df <- read_csv("./data/combinedstatshot.csv")
## Rows: 165 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): player, dunk_tot, dunk_pct, rim_tot, rim_pct, rim_asted, other2pt_...
## dbl (24): games, games_started, mp_per_g, fg_per_g, fga_per_g, fg_pct, fg2_p...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- df |> select(!c("games_started", "pf_per_g"))
colnames(df)
##  [1] "player"         "dunk_tot"       "dunk_pct"       "rim_tot"       
##  [5] "rim_pct"        "rim_asted"      "other2pt_tot"   "other2pt_pct"  
##  [9] "other2pt_asted" "3pt_tot"        "3pt_pct"        "3pt_asted"     
## [13] "games"          "mp_per_g"       "fg_per_g"       "fga_per_g"     
## [17] "fg_pct"         "fg2_per_g"      "fg2a_per_g"     "fg2_pct"       
## [21] "fg3_per_g"      "fg3a_per_g"     "fg3_pct"        "ft_per_g"      
## [25] "fta_per_g"      "ft_pct"         "orb_per_g"      "drb_per_g"     
## [29] "trb_per_g"      "ast_per_g"      "stl_per_g"      "blk_per_g"     
## [33] "tov_per_g"      "pts_per_g"

Plot 2pt attempts to 3pt attempts

From success script

path <- "~/BruinSports/data/draftdata.csv"
df_career_stats <- read_csv(path)
## Rows: 960 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): team_id, player, college_name, skip
## dbl (19): pick_overall, seasons, g, mp, pts, trb, ast, fg_pct, fg3_pct, ft_p...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
drop_cols <- c('team_id', 'skip', 'mp', 'pts', 'trb', 'ast')

df_career_stats <- df_career_stats |> select(!drop_cols)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(drop_cols)
## 
##   # Now:
##   data %>% select(all_of(drop_cols))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
df_lot_picks <- df_career_stats |> filter(pick_overall < 15)

colnames(df_career_stats)
##  [1] "pick_overall" "player"       "college_name" "seasons"      "g"           
##  [6] "fg_pct"       "fg3_pct"      "ft_pct"       "mp_per_g"     "pts_per_g"   
## [11] "trb_per_g"    "ast_per_g"    "ws"           "ws_per_48"    "bpm"         
## [16] "vorp"         "year"
# plotting the distribution of points + rebounds + asts (PRA) per game
df_lot_picks <- df_lot_picks |> mutate(
  pra_per_g = pts_per_g + trb_per_g + ast_per_g,
  pick_overall = factor(pick_overall)
)

# Group by draft position, and getting the mean
draft_means <- df_lot_picks |> group_by(pick_overall) |>
  summarize(avg_mpg = mean(mp_per_g),
            avg_ppg = mean(pts_per_g),
            avg_trbpg = mean(trb_per_g),
            avg_apg = mean(ast_per_g),
            avg_prapg = mean(pra_per_g))

print(draft_means, n = 14)
## # A tibble: 14 × 6
##    pick_overall avg_mpg avg_ppg avg_trbpg avg_apg avg_prapg
##    <fct>          <dbl>   <dbl>     <dbl>   <dbl>     <dbl>
##  1 1               31.2   18.8       6.52    4.32      29.7
##  2 2               26.9   13.7       4.96    2.88      21.6
##  3 3               30.0   17.6       6.3     3.52      27.4
##  4 4               27.5   12.8       5.39    2.48      20.7
##  5 5               26.0   12.7       4.92    3.55      21.2
##  6 6               23.2   10.2       4.35    2.26      16.8
##  7 7               27.6   13.4       4.88    2.77      21.0
##  8 8               21.1    8.82      3.37    1.77      14.0
##  9 9               24.5   10.9       4.55    2.52      18.0
## 10 10              23.5   10.4       3.61    2.31      16.3
## 11 11              21.1   10.2       3.69    2.16      16.0
## 12 12              24.6   10.6       4.47    2.23      17.3
## 13 13              22.6   10.9       3.99    2.23      17.1
## 14 14              20.3    8.78      3.75    1.3       13.8
df_lot_picks |> ggplot(aes(x = pick_overall, y = pra_per_g)) +
  geom_boxplot() + 
  labs(x = "Draft Pick", y = "Points-Rebounds-Assists Per Game")

Plotting the players in the top 25% in PRA per game

df2 <- left_join(df, df_lot_picks, by = c("player"), suffix = c("_college", "_nba"))
df2 <- df2 |> 
  separate_wider_delim(dunk_tot, delim = "-", names = c("dunk_made", "dunk_attempts"))

df2 <- df2 |>
  separate_wider_delim(rim_tot, delim = "-", names = c("rim_made", "rim_attempts"))

df2 <- df2 |>
  separate_wider_delim(other2pt_tot, delim = "-", names = c("other2pt_made", "other2pt_attempts"))

df2 <-df2 |>
  mutate(across(c(dunk_made, dunk_attempts, rim_made, rim_attempts, other2pt_made, other2pt_attempts), as.numeric))

defining bust metric

df2 <- df2 |> mutate(
  vorp_per_g = vorp / g
)

df_top_players <- df2 |> 
  group_by(pick_overall) |>
  summarize(across(c(pts_per_g_nba, trb_per_g_nba, ast_per_g_nba, pra_per_g, vorp_per_g), function(x) quantile(x, probs = 0.7)))

df_bottom_players <- df2 |> 
  group_by(pick_overall) |>
  summarize(across(c(pts_per_g_nba, trb_per_g_nba, ast_per_g_nba, pra_per_g, vorp_per_g), function(x) quantile(x, probs = 0.3)))

# metric favors big men
# make the rebound percentile higher

is_not_bust <- function(pick_number, df_top_players = df_top_players, df2 = df2) {
  ppg <- df_top_players |> pull(pts_per_g_nba)
  apg <- df_top_players |> pull(ast_per_g_nba)
  prapg <- df_top_players |> pull(pra_per_g)
  
  df_top_rb <- df2 |> 
    group_by(pick_overall) |>
    summarize(trb_per_g = quantile(trb_per_g_nba, probs = 0.8))
  
  rpg <- df_top_rb |> pull(trb_per_g)
  
  df_top_vorp <- df2 |> 
    group_by(pick_overall) |>
    summarize(vorp_per_g = median(vorp_per_g))
  
  vorppg <- df_top_vorp |> pull(vorp_per_g)
  
  df <- df2 |> filter(pick_overall == pick_number) |>
  filter(pra_per_g >= prapg[[pick_number]] | ast_per_g_nba >= apg[[pick_number]] | trb_per_g_nba >= rpg[[pick_number]] | pts_per_g_nba >= ppg[[pick_number]]) |>
    filter(vorp_per_g >= vorppg[[pick_number]]) 
  
  # must also have played at least most of their career in the nba
  df <- df |> filter(seasons >= 4 / 5 * (2023-year))
  
  return(df)
}

is_bust <- function(pick_number, df_bottom_players = df_bottom_players, df2 = df2) {
  ppg <- df_bottom_players |> pull(pts_per_g_nba)
  
  apg <- df_bottom_players |> pull(ast_per_g_nba)
  prapg <- df_bottom_players |> pull(pra_per_g)
  
  df_bottom_rb <- df2 |> 
    group_by(pick_overall) |>
    summarize(trb_per_g = quantile(trb_per_g_nba, probs = 0.4))
  
  rpg <- df_bottom_rb |> pull(trb_per_g)
  
  df_top_vorp <- df2 |> 
    group_by(pick_overall) |>
    summarize(vorp_per_g = median(vorp_per_g))
  
  vorppg <- df_top_vorp |> pull(vorp_per_g)
  
  # playing less than half the seasons since drafted makes you a bust
  df <- df2 |> filter(pick_overall == pick_number) |>
  filter((pra_per_g < prapg[[pick_number]] & ast_per_g_nba < apg[[pick_number]] & trb_per_g_nba < rpg[[pick_number]] & pts_per_g_nba < ppg[[pick_number]] & vorp_per_g < vorppg[[pick_number]]) | seasons < 1 / 2 * (2023-year))
  
  
  return(df)
}
# first overall picks (not bust)
df_pick_1 <- is_not_bust(1, df_top_players, df2)

# first overall picks (bust)
df_pick_1_bust <- is_bust(1, df_bottom_players, df2)

df_pick_1
## # A tibble: 7 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 John Wall              33            36 91.7%         116          182 63.7%  
## 2 Kyrie Irving            0             0 0%             26           39 66.7%  
## 3 Anthony Davis          96            98 98.0%         152          174 87.4%  
## 4 Karl-Anthony T…        22            24 91.7%          87          121 71.9%  
## 5 Ben Simmons            56            61 91.8%         159          220 72.3%  
## 6 Zion Williamson        72            79 91.1%         247          313 78.9%  
## 7 Anthony Edwards        27            27 100.0%         89          129 69.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_1_bust
## # A tibble: 1 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Anthony Bennett        53            58 91.4%         100          140 71.4%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# second overall
df_pick_2 <- is_not_bust(2, df_top_players, df2)
df_pick_2_bust <- is_bust(2, df_bottom_players, df2)

df_pick_2
## # A tibble: 5 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 D'Angelo Russe…         4             4 100.0%         70          110 63.6%  
## 2 Brandon Ingram         17            17 100.0%         69          117 59.0%  
## 3 Lonzo Ball             37            40 92.5%          94          120 78.3%  
## 4 Ja Morant              28            31 90.3%         160          264 60.6%  
## 5 Chet Holmgren          57            57 100.0%        105          125 84.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_2_bust
## # A tibble: 1 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Derrick Willia…        56            60 93.3%         135          188 71.8%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# third overall
df_pick_3 <- is_not_bust(3, df_top_players, df2)
df_pick_3_bust <- is_bust(3, df_bottom_players, df2)

df_pick_3
## # A tibble: 4 × 55
##   player       dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>            <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Bradley Beal        18            20 90.0%          89          137 65.0%  
## 2 Joel Embiid         30            30 100.0%         80           99 80.8%  
## 3 Jayson Tatum        18            21 85.7%          79          126 62.7%  
## 4 Evan Mobley         63            66 95.5%         113          144 78.5%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_3_bust
## # A tibble: 1 × 55
##   player        dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>             <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Jahlil Okafor        64            67 95.5%         213          270 78.9%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# fourth overall
df_pick_4 <- is_not_bust(4, df_top_players, df2)
df_pick_4_bust <- is_bust(4, df_bottom_players, df2)

df_pick_4
## # A tibble: 4 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Aaron Gordon           54            56 96.4%         137          198 69.2%  
## 2 Jaren Jackson …        31            31 100.0%         61           93 65.6%  
## 3 Scottie Barnes         19            21 90.5%          61           89 68.5%  
## 4 Keegan Murray          63            67 94.0%         196          277 70.8%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_4_bust
## # A tibble: 0 × 55
## # ℹ 55 variables: player <chr>, dunk_made <dbl>, dunk_attempts <dbl>,
## #   dunk_pct <chr>, rim_made <dbl>, rim_attempts <dbl>, rim_pct <chr>,
## #   rim_asted <chr>, other2pt_made <dbl>, other2pt_attempts <dbl>,
## #   other2pt_pct <chr>, other2pt_asted <chr>, 3pt_tot <chr>, 3pt_pct <chr>,
## #   3pt_asted <chr>, games <dbl>, mp_per_g_college <dbl>, fg_per_g <dbl>,
## #   fga_per_g <dbl>, fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>,
## #   fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, …
# fifth overall
df_pick_5 <- is_not_bust(5, df_top_players, df2)
df_pick_5_bust <- is_bust(5, df_bottom_players, df2)

df_pick_5
## # A tibble: 3 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 DeMarcus Cousi…        53            57 93.0%         144          189 76.2%  
## 2 De'Aaron Fox           20            21 95.2%         131          203 64.5%  
## 3 Trae Young              0             0 0%            105          201 52.2%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_5_bust
## # A tibble: 1 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Thomas Robinson        70            83 84.3%         169          262 64.5%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# sixth overall
df_pick_6 <- is_not_bust(6, df_top_players, df2)
df_pick_6_bust <- is_bust(6, df_bottom_players, df2)

df_pick_6
## # A tibble: 5 × 55
##   player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Damian Lillard        13            17 76.5%          98          169 58.0%  
## 2 Nerlens Noel          48            50 96.0%          76           99 76.8%  
## 3 Marcus Smart          16            18 88.9%          78          110 70.9%  
## 4 Buddy Hield           18            22 81.8%         119          178 66.9%  
## 5 Onyeka Okongwu        58            61 95.1%         135          186 72.6%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_6_bust
## # A tibble: 1 × 55
##   player    dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>         <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ekpe Udoh        30            32 93.8%          78          109 71.6%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# seventh overall
df_pick_7 <- is_not_bust(7, df_top_players, df2)
df_pick_7_bust <- is_bust(7, df_bottom_players, df2)

df_pick_7
## # A tibble: 3 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Julius Randle          37            40 92.5%         132          197 67.0%  
## 2 Jamal Murray           18            19 94.7%          77          111 69.4%  
## 3 Lauri Markkanen        20            24 83.3%          65          100 65.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_7_bust
## # A tibble: 1 × 55
##   player       dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>            <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ben McLemore        44            45 97.8%          90          126 71.4%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# eighth overall
df_pick_8 <- is_not_bust(8, df_top_players, df2)
df_pick_8_bust <- is_bust(8, df_bottom_players, df2)

df_pick_8
## # A tibble: 3 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Al-Farouq Aminu        46            48 95.8%         112          173 64.7%  
## 2 Kentavious Cal…        15            16 93.8%          63           94 67.0%  
## 3 Franz Wagner           11            11 100.0%         63           93 67.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_8_bust
## # A tibble: 0 × 55
## # ℹ 55 variables: player <chr>, dunk_made <dbl>, dunk_attempts <dbl>,
## #   dunk_pct <chr>, rim_made <dbl>, rim_attempts <dbl>, rim_pct <chr>,
## #   rim_asted <chr>, other2pt_made <dbl>, other2pt_attempts <dbl>,
## #   other2pt_pct <chr>, other2pt_asted <chr>, 3pt_tot <chr>, 3pt_pct <chr>,
## #   3pt_asted <chr>, games <dbl>, mp_per_g_college <dbl>, fg_per_g <dbl>,
## #   fga_per_g <dbl>, fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>,
## #   fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, …
# ninth overall
df_pick_9 <- is_not_bust(9, df_top_players, df2)
df_pick_9_bust <- is_bust(9, df_bottom_players, df2)

df_pick_9
## # A tibble: 5 × 55
##   player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Gordon Hayward        19            20 95.0%          89          128 69.5%  
## 2 Kemba Walker           3             3 100.0%        115          196 58.7%  
## 3 Andre Drummond        80            89 89.9%         130          185 70.3%  
## 4 Trey Burke             9             9 100.0%         67          105 63.8%  
## 5 Jakob Poeltl          32            34 94.1%         199          284 70.1%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_9_bust
## # A tibble: 1 × 55
##   player     dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>          <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Kevin Knox        18            20 90.0%          65           99 65.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# tenth overall
df_pick_10 <- is_not_bust(10, df_top_players, df2)
df_pick_10_bust <- is_bust(10, df_bottom_players, df2)

df_pick_10
## # A tibble: 5 × 55
##   player        dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>             <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Paul George          18            22 81.8%          70          106 66.0%  
## 2 CJ McCollum           3             3 100.0%         34           63 54.0%  
## 3 Elfrid Payton        21            24 87.5%         169          247 68.4%  
## 4 Mikal Bridges        35            42 83.3%         109          161 67.7%  
## 5 Jalen Smith          49            52 94.2%         114          158 72.2%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_10_bust
## # A tibble: 2 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ziaire Williams        10            11 90.9%          26           49 53.1%  
## 2 Johnny Davis           16            19 84.2%          89          143 62.2%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# eleventh overall
df_pick_11 <- is_not_bust(11, df_top_players, df2)
df_pick_11_bust <- is_bust(11, df_bottom_players, df2)

df_pick_11
## # A tibble: 4 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Klay Thompson           8             8 100.0%         66          110 60.0%  
## 2 Myles Turner           11            13 84.6%          40           54 74.1%  
## 3 Domantas Sabon…        22            24 91.7%         157          214 73.4%  
## 4 Shai Gilgeous-…        11            11 100.0%        108          182 59.3%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_11_bust
## # A tibble: 2 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 James Bouknight        12            12 100.0%         52           79 65.8%  
## 2 Jett Howard             6             6 100.0%         29           47 61.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# twelveth overall
df_pick_12 <- is_not_bust(12, df_top_players, df2)
df_pick_12_bust <- is_bust(12, df_bottom_players, df2)

df_pick_12
## # A tibble: 5 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Steven Adams           29            33 87.9%          85          129 65.9%  
## 2 Miles Bridges          30            35 85.7%          84          128 65.6%  
## 3 Tyrese Halibur…         7             8 87.5%          46           62 74.2%  
## 4 Jalen Williams         25            27 92.6%         124          186 66.7%  
## 5 Dereck Lively …        54            55 98.2%          74           96 77.1%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_12_bust
## # A tibble: 1 × 55
##   player       dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>            <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Xavier Henry        17            17 100.0%         60           90 66.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# thirteenth overall
df_pick_13 <- is_not_bust(13, df_top_players, df2)
df_pick_13_bust <- is_bust(13, df_bottom_players, df2)

df_pick_13
## # A tibble: 7 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ed Davis               26            27 96.3%          42           50 84.0%  
## 2 Kelly Olynyk           25            28 89.3%         152          212 71.7%  
## 3 Zach LaVine            21            25 84.0%          51           90 56.7%  
## 4 Devin Booker            8             9 88.9%          42           59 71.2%  
## 5 Donovan Mitche…         9            13 69.2%          64          116 55.2%  
## 6 Tyler Herro             4             5 80.0%          56           84 66.7%  
## 7 Jalen Duren            70            76 92.1%         111          152 73.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_13_bust
## # A tibble: 2 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Kendall Marsha…         0             0 0%             35           53 66.0%  
## 2 Jerome Robinson        12            13 92.3%          98          157 62.4%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# fourteenth overall
df_pick_14 <- is_not_bust(14, df_top_players, df2)
df_pick_14_bust <- is_bust(14, df_bottom_players, df2)

df_pick_14
## # A tibble: 4 × 55
##   player        dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>             <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Marcus Morris        31            33 93.9%         114          147 77.6%  
## 2 T.J. Warren          37            37 100.0%        192          251 76.5%  
## 3 Cameron Payne         3             3 100.0%         53           87 60.9%  
## 4 Bam Adebayo          99           105 94.3%         138          185 74.6%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_14_bust
## # A tibble: 1 × 55
##   player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Romeo Langford        10            14 71.4%          91          138 65.9%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# combining dataframes

df_good <- bind_rows(df_pick_1, df_pick_2, df_pick_3, df_pick_4, df_pick_5, df_pick_6, df_pick_7, df_pick_8, df_pick_9,
              df_pick_10, df_pick_11, df_pick_12, df_pick_13, df_pick_14)

df_busts <- bind_rows(df_pick_1_bust, df_pick_2_bust, df_pick_3_bust, df_pick_4_bust, df_pick_5_bust, df_pick_6_bust, df_pick_7_bust, df_pick_8_bust, df_pick_9_bust, df_pick_10_bust, df_pick_11_bust, df_pick_12_bust, df_pick_13_bust, df_pick_14_bust)

print(df_good, n = 20)
## # A tibble: 64 × 55
##    player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##    <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
##  1 John Wall             33            36 91.7%         116          182 63.7%  
##  2 Kyrie Irving           0             0 0%             26           39 66.7%  
##  3 Anthony Davis         96            98 98.0%         152          174 87.4%  
##  4 Karl-Anthony …        22            24 91.7%          87          121 71.9%  
##  5 Ben Simmons           56            61 91.8%         159          220 72.3%  
##  6 Zion Williams…        72            79 91.1%         247          313 78.9%  
##  7 Anthony Edwar…        27            27 100.0%         89          129 69.0%  
##  8 D'Angelo Russ…         4             4 100.0%         70          110 63.6%  
##  9 Brandon Ingram        17            17 100.0%         69          117 59.0%  
## 10 Lonzo Ball            37            40 92.5%          94          120 78.3%  
## 11 Ja Morant             28            31 90.3%         160          264 60.6%  
## 12 Chet Holmgren         57            57 100.0%        105          125 84.0%  
## 13 Bradley Beal          18            20 90.0%          89          137 65.0%  
## 14 Joel Embiid           30            30 100.0%         80           99 80.8%  
## 15 Jayson Tatum          18            21 85.7%          79          126 62.7%  
## 16 Evan Mobley           63            66 95.5%         113          144 78.5%  
## 17 Aaron Gordon          54            56 96.4%         137          198 69.2%  
## 18 Jaren Jackson…        31            31 100.0%         61           93 65.6%  
## 19 Scottie Barnes        19            21 90.5%          61           89 68.5%  
## 20 Keegan Murray         63            67 94.0%         196          277 70.8%  
## # ℹ 44 more rows
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>, …
df_good |> pull(player)
##  [1] "John Wall"                "Kyrie Irving"            
##  [3] "Anthony Davis"            "Karl-Anthony Towns"      
##  [5] "Ben Simmons"              "Zion Williamson"         
##  [7] "Anthony Edwards"          "D'Angelo Russell"        
##  [9] "Brandon Ingram"           "Lonzo Ball"              
## [11] "Ja Morant"                "Chet Holmgren"           
## [13] "Bradley Beal"             "Joel Embiid"             
## [15] "Jayson Tatum"             "Evan Mobley"             
## [17] "Aaron Gordon"             "Jaren Jackson Jr."       
## [19] "Scottie Barnes"           "Keegan Murray"           
## [21] "DeMarcus Cousins"         "De'Aaron Fox"            
## [23] "Trae Young"               "Damian Lillard"          
## [25] "Nerlens Noel"             "Marcus Smart"            
## [27] "Buddy Hield"              "Onyeka Okongwu"          
## [29] "Julius Randle"            "Jamal Murray"            
## [31] "Lauri Markkanen"          "Al-Farouq Aminu"         
## [33] "Kentavious Caldwell-Pope" "Franz Wagner"            
## [35] "Gordon Hayward"           "Kemba Walker"            
## [37] "Andre Drummond"           "Trey Burke"              
## [39] "Jakob Poeltl"             "Paul George"             
## [41] "CJ McCollum"              "Elfrid Payton"           
## [43] "Mikal Bridges"            "Jalen Smith"             
## [45] "Klay Thompson"            "Myles Turner"            
## [47] "Domantas Sabonis"         "Shai Gilgeous-Alexander" 
## [49] "Steven Adams"             "Miles Bridges"           
## [51] "Tyrese Haliburton"        "Jalen Williams"          
## [53] "Dereck Lively II"         "Ed Davis"                
## [55] "Kelly Olynyk"             "Zach LaVine"             
## [57] "Devin Booker"             "Donovan Mitchell"        
## [59] "Tyler Herro"              "Jalen Duren"             
## [61] "Marcus Morris"            "T.J. Warren"             
## [63] "Cameron Payne"            "Bam Adebayo"
df_busts
## # A tibble: 15 × 55
##    player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##    <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
##  1 Anthony Benne…        53            58 91.4%         100          140 71.4%  
##  2 Derrick Willi…        56            60 93.3%         135          188 71.8%  
##  3 Jahlil Okafor         64            67 95.5%         213          270 78.9%  
##  4 Thomas Robins…        70            83 84.3%         169          262 64.5%  
##  5 Ekpe Udoh             30            32 93.8%          78          109 71.6%  
##  6 Ben McLemore          44            45 97.8%          90          126 71.4%  
##  7 Kevin Knox            18            20 90.0%          65           99 65.7%  
##  8 Ziaire Willia…        10            11 90.9%          26           49 53.1%  
##  9 Johnny Davis          16            19 84.2%          89          143 62.2%  
## 10 James Bouknig…        12            12 100.0%         52           79 65.8%  
## 11 Jett Howard            6             6 100.0%         29           47 61.7%  
## 12 Xavier Henry          17            17 100.0%         60           90 66.7%  
## 13 Kendall Marsh…         0             0 0%             35           53 66.0%  
## 14 Jerome Robins…        12            13 92.3%          98          157 62.4%  
## 15 Romeo Langford        10            14 71.4%          91          138 65.9%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_busts |> pull(player)
##  [1] "Anthony Bennett"  "Derrick Williams" "Jahlil Okafor"    "Thomas Robinson" 
##  [5] "Ekpe Udoh"        "Ben McLemore"     "Kevin Knox"       "Ziaire Williams" 
##  [9] "Johnny Davis"     "James Bouknight"  "Jett Howard"      "Xavier Henry"    
## [13] "Kendall Marshall" "Jerome Robinson"  "Romeo Langford"
plot_good <- ggplot(df_good, aes(x = fg2_per_g, y = fg3_per_g)) +
  geom_point(color = "green", size = 4, alpha = 0.5) +
  geom_label_repel(data = df_good, aes(label = player), size = 1.5, max.overlaps = 20) +
  labs(title = "CBB Shot Selection for Good Value NBA Lottery Picks", x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game") +
  theme_bw()

plot_busts <- ggplot(df_busts, aes(x = fg2_per_g, y = fg3_per_g, label = player)) +
  geom_point(color = "red", size = 4, alpha = 0.5) +
  geom_label_repel(size = 3) +
  labs(title = "CBB Shot Selection for NBA Lottery Busts", x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game") +
  theme_bw()


plot_combined <- ggplot(df2, aes(x = fg2_per_g, y = fg3_per_g)) +
  geom_point(aes(color = "Average value"), size = 4, alpha = 0.2) +
  geom_point(data = df_busts, aes(color = "Bad value"), size = 4, alpha = 0.5) +
  geom_point(data = df_good, aes(color = "Good value"), size = 4, alpha = 0.5) +
  labs(title = "CBB Shot Selection for NBA Lottery Picks", 
       x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game") +
  scale_color_manual(values = c("Bad value" = "red", "Good value" = "green", "Average value" = "grey")) +
  theme_bw()

plot_combined

plot_busts

plot_good

Principal Component Analysis of college stats, for grouping/covariance

library(corrr)
library(ggcorrplot)
library(FactoMineR)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

Preparing Data

colnames(df2)
##  [1] "player"            "dunk_made"         "dunk_attempts"    
##  [4] "dunk_pct"          "rim_made"          "rim_attempts"     
##  [7] "rim_pct"           "rim_asted"         "other2pt_made"    
## [10] "other2pt_attempts" "other2pt_pct"      "other2pt_asted"   
## [13] "3pt_tot"           "3pt_pct"           "3pt_asted"        
## [16] "games"             "mp_per_g_college"  "fg_per_g"         
## [19] "fga_per_g"         "fg_pct_college"    "fg2_per_g"        
## [22] "fg2a_per_g"        "fg2_pct"           "fg3_per_g"        
## [25] "fg3a_per_g"        "fg3_pct_college"   "ft_per_g"         
## [28] "fta_per_g"         "ft_pct_college"    "orb_per_g"        
## [31] "drb_per_g"         "trb_per_g_college" "ast_per_g_college"
## [34] "stl_per_g"         "blk_per_g"         "tov_per_g"        
## [37] "pts_per_g_college" "pick_overall"      "college_name"     
## [40] "seasons"           "g"                 "fg_pct_nba"       
## [43] "fg3_pct_nba"       "ft_pct_nba"        "mp_per_g_nba"     
## [46] "pts_per_g_nba"     "trb_per_g_nba"     "ast_per_g_nba"    
## [49] "ws"                "ws_per_48"         "bpm"              
## [52] "vorp"              "year"              "pra_per_g"        
## [55] "vorp_per_g"
df3 <- column_to_rownames(df2, var = "player")

df_cbb <- df3 |> select(
  dunk_made, dunk_attempts, dunk_pct, rim_made, rim_attempts, rim_pct, rim_asted,
  other2pt_made, other2pt_attempts, other2pt_pct, other2pt_asted, fg2_pct, fg3_per_g, fg3a_per_g, 
 `3pt_asted`, games, ft_per_g, fta_per_g, ast_per_g_college, orb_per_g, drb_per_g,
  stl_per_g, blk_per_g, tov_per_g, pts_per_g_college
) |> rename(fg3_asted = `3pt_asted`) |> 
  mutate(dunk_pct = parse_number(dunk_pct) / 100, # changing to percentages
         rim_pct = parse_number(rim_pct) / 100,
         rim_asted = parse_number(rim_asted) / 100,
         other2pt_pct = parse_number(other2pt_pct) / 100,
         other2pt_asted = parse_number(other2pt_asted) / 100,
         fg3_asted = parse_number(fg3_asted) / 100,
         fg3_pct_per_g = fg3_per_g / fg3a_per_g)

df_cbb <- df_cbb |> 
  mutate(fg3_pct_per_g = coalesce(fg3_pct_per_g, 0)) |>
  relocate(fg3_pct_per_g, .after = fg3_asted)

to_per_game <- function(x, games) {
  x <- x / games
  return(x)
}

df_cbb <- df_cbb |> # making everything per game
  mutate(across(c(dunk_made, dunk_attempts, rim_made, rim_attempts, other2pt_made, 
                  other2pt_attempts), function(x) to_per_game(x, games)))

colSums(is.na(df_cbb))
##         dunk_made     dunk_attempts          dunk_pct          rim_made 
##                 0                 0                 0                 0 
##      rim_attempts           rim_pct         rim_asted     other2pt_made 
##                 0                 0                 0                 0 
## other2pt_attempts      other2pt_pct    other2pt_asted           fg2_pct 
##                 0                 0                 0                 0 
##         fg3_per_g        fg3a_per_g         fg3_asted     fg3_pct_per_g 
##                 0                 0                 0                 0 
##             games          ft_per_g         fta_per_g ast_per_g_college 
##                 0                 0                 0                 0 
##         orb_per_g         drb_per_g         stl_per_g         blk_per_g 
##                 0                 0                 0                 0 
##         tov_per_g pts_per_g_college 
##                 0                 0

Following this guide: https://www.datacamp.com/tutorial/pca-analysis-r

df_cbb_scaled <- as_tibble(scale(df_cbb |> select(-games)))
df_cbb_scaled
## # A tibble: 165 × 25
##    dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct rim_asted
##        <dbl>         <dbl>    <dbl>    <dbl>        <dbl>   <dbl>     <dbl>
##  1     0.360        0.381    0.147    0.898        1.16    -0.569   -0.927 
##  2    -0.777       -0.801    0.334   -0.908       -1.09     1.49    -1.46  
##  3     1.28         1.26     0.328    1.47         1.12     1.46     0.671 
##  4     0.623        0.680    0.0540   0.492        0.395    0.600    0.742 
##  5     1.16         1.17     0.228    1.45         1.19     1.17     0.642 
##  6     0.267        0.254    0.278    0.0749      -0.0119   0.531   -0.0603
##  7    -0.546       -0.575    0.384    0.247        0.380   -0.430    0.0517
##  8     0.121        0.0815   0.403   -0.231       -0.159   -0.430    0.0753
##  9    -0.622       -0.648    0.353   -0.670       -0.737    0.238    0.624 
## 10    -0.606       -0.559   -0.469   -0.822       -0.844   -0.249   -0.615 
## # ℹ 155 more rows
## # ℹ 18 more variables: other2pt_made <dbl>, other2pt_attempts <dbl>,
## #   other2pt_pct <dbl>, other2pt_asted <dbl>, fg2_pct <dbl>, fg3_per_g <dbl>,
## #   fg3a_per_g <dbl>, fg3_asted <dbl>, fg3_pct_per_g <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>,
## #   stl_per_g <dbl>, blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>
# df_cbb_scaled$player <- df_cbb$player
# df_cbb_scaled <- df_cbb_scaled |> relocate(player, .before = dunk_made)

corr_matrix <- cor(df_cbb_scaled)
ggcorrplot(corr_matrix, method = "square")

ggcorrplot(corr_matrix, method = "square", hc.order = TRUE,
           type = "lower")

pca <- princomp(corr_matrix)
summary(pca)
## Importance of components:
##                           Comp.1    Comp.2     Comp.3     Comp.4    Comp.5
## Standard deviation     1.5264968 0.8275209 0.38297252 0.26189097 0.2392287
## Proportion of Variance 0.6731171 0.1978139 0.04236762 0.01981253 0.0165320
## Cumulative Proportion  0.6731171 0.8709310 0.91329860 0.93311112 0.9496431
##                           Comp.6     Comp.7      Comp.8      Comp.9     Comp.10
## Standard deviation     0.2338592 0.19672875 0.166119165 0.136620686 0.092515541
## Proportion of Variance 0.0157982 0.01117981 0.007971468 0.005391775 0.002472454
## Cumulative Proportion  0.9654413 0.97662114 0.984592603 0.989984378 0.992456832
##                            Comp.11     Comp.12     Comp.13      Comp.14
## Standard deviation     0.087727419 0.077813416 0.063038666 0.0522112215
## Proportion of Variance 0.002223154 0.001749072 0.001147923 0.0007874564
## Cumulative Proportion  0.994679986 0.996429058 0.997576981 0.9983644376
##                             Comp.15      Comp.16      Comp.17      Comp.18
## Standard deviation     0.0466045703 0.0419416771 0.0315945772 0.0227293361
## Proportion of Variance 0.0006274163 0.0005081482 0.0002883526 0.0001492356
## Cumulative Proportion  0.9989918540 0.9995000022 0.9997883548 0.9999375904
##                             Comp.19      Comp.20      Comp.21      Comp.22
## Standard deviation     1.371363e-02 4.393920e-03 2.341039e-03 1.590896e-03
## Proportion of Variance 5.432546e-05 5.577032e-06 1.583129e-06 7.311092e-07
## Cumulative Proportion  9.999919e-01 9.999975e-01 9.999991e-01 9.999998e-01
##                             Comp.23      Comp.24      Comp.25
## Standard deviation     7.221375e-04 3.825110e-04 7.167572e-09
## Proportion of Variance 1.506394e-07 4.226557e-08 1.484031e-17
## Cumulative Proportion  1.000000e+00 1.000000e+00 1.000000e+00
fviz_eig(pca, addlabels = TRUE, title = "Principal Components Scree Plot")

fviz_pca_var(pca, col.var = "cos2", repel = TRUE, title = "Contributions of Variables to Components 1 and 2", gradient.cols = c("lightblue", "black"))

fviz_cos2(pca, choice = "var", axes = 1:2)

Guides: http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/112-pca-principal-component-analysis-essentials/

Kmeans Clustering: https://medium.com/@zullinira23/implementation-of-principal-component-analysis-pca-on-k-means-clustering-in-r-794f03ec15f

df_cbb.sample <- df_cbb |> sample_frac(1, replace = FALSE)
head(df_cbb.sample)
##                          dunk_made dunk_attempts dunk_pct  rim_made
## Dion Waiters             0.3661972     0.4084507    0.897 1.2676056
## Buddy Hield              0.1363636     0.1666667    0.818 0.9015152
## Kentavious Caldwell-Pope 0.2343750     0.2500000    0.938 0.9843750
## Jalen Williams           0.2976190     0.3214286    0.926 1.4761905
## Dennis Smith Jr.         0.6875000     0.6875000    1.000 3.2187500
## Ochai Agbaji             0.3032787     0.3360656    0.902 0.9754098
##                          rim_attempts rim_pct rim_asted other2pt_made
## Dion Waiters                 2.028169   0.625     0.378     0.5492958
## Buddy Hield                  1.348485   0.669     0.277     0.2651515
## Kentavious Caldwell-Pope     1.468750   0.670     0.349     0.6093750
## Jalen Williams               2.214286   0.667     0.355     0.6309524
## Dennis Smith Jr.             5.031250   0.640     0.155     1.0312500
## Ochai Agbaji                 1.344262   0.726     0.529     0.2950820
##                          other2pt_attempts other2pt_pct other2pt_asted fg2_pct
## Dion Waiters                     1.4084507        0.390          0.282   0.513
## Buddy Hield                      0.7651515        0.347          0.057   0.508
## Kentavious Caldwell-Pope         1.7500000        0.348          0.385   0.504
## Jalen Williams                   1.6071429        0.393          0.170   0.525
## Dennis Smith Jr.                 3.3437500        0.308          0.030   0.509
## Ochai Agbaji                     1.0327869        0.286          0.389   0.526
##                          fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g games
## Dion Waiters                   1.0        2.8     0.780     0.3571429    71
## Buddy Hield                    2.6        6.8     0.687     0.3823529   132
## Kentavious Caldwell-Pope       2.3        6.9     0.583     0.3333333    64
## Jalen Williams                 1.1        3.0     0.714     0.3666667    84
## Dennis Smith Jr.               1.7        4.8     0.418     0.3541667    32
## Ochai Agbaji                   2.0        5.5     0.854     0.3636364   122
##                          ft_per_g fta_per_g ast_per_g_college orb_per_g
## Dion Waiters                  1.8       2.3               2.0       0.4
## Buddy Hield                   2.8       3.4               1.9       1.6
## Kentavious Caldwell-Pope      2.9       3.9               1.5       1.3
## Jalen Williams                2.6       3.3               2.9       0.5
## Dennis Smith Jr.              4.5       6.3               6.2       0.8
## Ochai Agbaji                  1.8       2.5               1.6       1.0
##                          drb_per_g stl_per_g blk_per_g tov_per_g
## Dion Waiters                   1.5       1.5       0.2       1.1
## Buddy Hield                    3.4       1.3       0.3       2.2
## Kentavious Caldwell-Pope       4.8       1.9       0.4       1.6
## Jalen Williams                 3.2       1.2       0.5       1.6
## Dennis Smith Jr.               3.8       1.9       0.4       3.4
## Ochai Agbaji                   3.4       1.0       0.5       1.7
##                          pts_per_g_college
## Dion Waiters                           9.7
## Buddy Hield                           17.4
## Kentavious Caldwell-Pope              15.8
## Jalen Williams                        12.6
## Dennis Smith Jr.                      18.1
## Ochai Agbaji                          13.5
df_cbb.pca <- PCA(df_cbb.sample |> select(-games), scale.unit = TRUE, graph = FALSE)

fviz_eig(df_cbb.pca, addlabels = TRUE)

fviz_pca_var(df_cbb.pca, col.var = "cos2", gradient.cols = c("lightblue", "black"), repel = TRUE)

var <- get_pca_var(df_cbb.pca)

fviz_cos2(df_cbb.pca, choice = "var", axes = 1:2)

fviz_pca_ind(df_cbb.pca, repel = TRUE, labelsize = 1)

summary(df_cbb.pca)
## 
## Call:
## PCA(X = select(df_cbb.sample, -games), scale.unit = TRUE, graph = FALSE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance               7.910   5.334   2.090   1.463   1.278   1.196   1.058
## % of var.             31.642  21.336   8.359   5.852   5.113   4.784   4.231
## Cumulative % of var.  31.642  52.978  61.336  67.189  72.302  77.085  81.316
##                        Dim.8   Dim.9  Dim.10  Dim.11  Dim.12  Dim.13  Dim.14
## Variance               0.980   0.728   0.551   0.447   0.419   0.332   0.269
## % of var.              3.922   2.911   2.203   1.787   1.677   1.329   1.075
## Cumulative % of var.  85.238  88.149  90.352  92.140  93.817  95.146  96.221
##                       Dim.15  Dim.16  Dim.17  Dim.18  Dim.19  Dim.20  Dim.21
## Variance               0.254   0.210   0.164   0.138   0.072   0.062   0.022
## % of var.              1.017   0.840   0.655   0.553   0.286   0.247   0.088
## Cumulative % of var.  97.238  98.079  98.734  99.287  99.574  99.821  99.909
##                       Dim.22  Dim.23  Dim.24  Dim.25
## Variance               0.012   0.006   0.004   0.002
## % of var.              0.046   0.023   0.014   0.007
## Cumulative % of var.  99.955  99.978  99.993 100.000
## 
## Individuals (the 10 first)
##                              Dist    Dim.1    ctr   cos2    Dim.2    ctr   cos2
## Dion Waiters             |  4.206 | -1.746  0.234  0.172 | -3.003  1.024  0.510
## Buddy Hield              |  4.345 | -3.249  0.809  0.559 | -1.080  0.132  0.062
## Kentavious Caldwell-Pope |  3.786 | -2.159  0.357  0.325 | -0.907  0.093  0.057
## Jalen Williams           |  2.683 | -1.639  0.206  0.373 | -1.607  0.293  0.359
## Dennis Smith Jr.         |  5.454 | -2.630  0.530  0.233 |  3.845  1.679  0.497
## Ochai Agbaji             |  3.950 | -1.316  0.133  0.111 | -2.815  0.901  0.508
## Bradley Beal             |  2.277 | -0.914  0.064  0.161 |  0.487  0.027  0.046
## Dereck Lively II         |  8.949 |  5.280  2.136  0.348 | -4.948  2.782  0.306
## Greg Monroe              |  3.685 |  0.629  0.030  0.029 |  0.824  0.077  0.050
## Brandon Ingram           |  3.997 | -1.765  0.239  0.195 |  1.668  0.316  0.174
##                             Dim.3    ctr   cos2  
## Dion Waiters             | -0.643  0.120  0.023 |
## Buddy Hield              | -0.500  0.073  0.013 |
## Kentavious Caldwell-Pope | -0.034  0.000  0.000 |
## Jalen Williams           | -0.587  0.100  0.048 |
## Dennis Smith Jr.         | -2.043  1.210  0.140 |
## Ochai Agbaji             |  0.036  0.000  0.000 |
## Bradley Beal             | -0.205  0.012  0.008 |
## Dereck Lively II         | -1.268  0.466  0.020 |
## Greg Monroe              | -1.667  0.806  0.205 |
## Brandon Ingram           |  2.167  1.362  0.294 |
## 
## Variables (the 10 first)
##                             Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3
## dunk_made                |  0.834  8.788  0.695 |  0.336  2.113  0.113 | -0.023
## dunk_attempts            |  0.828  8.662  0.685 |  0.337  2.132  0.114 | -0.023
## dunk_pct                 |  0.343  1.489  0.118 | -0.070  0.091  0.005 |  0.214
## rim_made                 |  0.585  4.330  0.343 |  0.673  8.503  0.454 | -0.093
## rim_attempts             |  0.453  2.598  0.205 |  0.740 10.256  0.547 | -0.097
## rim_pct                  |  0.728  6.706  0.530 | -0.141  0.373  0.020 | -0.002
## rim_asted                |  0.768  7.456  0.590 | -0.352  2.327  0.124 |  0.180
## other2pt_made            |  0.109  0.149  0.012 |  0.603  6.823  0.364 |  0.563
## other2pt_attempts        |  0.102  0.132  0.010 |  0.651  7.952  0.424 |  0.490
## other2pt_pct             |  0.015  0.003  0.000 | -0.039  0.029  0.002 |  0.358
##                             ctr   cos2  
## dunk_made                 0.025  0.001 |
## dunk_attempts             0.025  0.001 |
## dunk_pct                  2.193  0.046 |
## rim_made                  0.416  0.009 |
## rim_attempts              0.448  0.009 |
## rim_pct                   0.000  0.000 |
## rim_asted                 1.558  0.033 |
## other2pt_made            15.195  0.318 |
## other2pt_attempts        11.501  0.240 |
## other2pt_pct              6.122  0.128 |
pca2 <- prcomp(df_cbb.sample, center = TRUE, scale = TRUE)
summary(pca2)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.8233 2.3564 1.45379 1.28788 1.18415 1.09891 1.03423
## Proportion of Variance 0.3066 0.2135 0.08129 0.06379 0.05393 0.04645 0.04114
## Cumulative Proportion  0.3066 0.5201 0.60142 0.66521 0.71914 0.76559 0.80673
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.99030 0.85552 0.75400 0.67049 0.65151 0.64104 0.57399
## Proportion of Variance 0.03772 0.02815 0.02187 0.01729 0.01633 0.01581 0.01267
## Cumulative Proportion  0.84445 0.87260 0.89447 0.91176 0.92808 0.94389 0.95656
##                           PC15    PC16    PC17    PC18    PC19    PC20    PC21
## Standard deviation     0.51590 0.49111 0.40960 0.40167 0.37014 0.26727 0.20134
## Proportion of Variance 0.01024 0.00928 0.00645 0.00621 0.00527 0.00275 0.00156
## Cumulative Proportion  0.96680 0.97607 0.98253 0.98873 0.99400 0.99675 0.99831
##                           PC22    PC23    PC24    PC25    PC26
## Standard deviation     0.14559 0.10740 0.07652 0.06000 0.04282
## Proportion of Variance 0.00082 0.00044 0.00023 0.00014 0.00007
## Cumulative Proportion  0.99912 0.99957 0.99979 0.99993 1.00000
df_cluster <- as.data.frame(-pca2$x[, 1:2])
df_cluster
##                                  PC1          PC2
## Dion Waiters             -1.87877434  2.893422978
## Buddy Hield              -3.50692348  1.416444649
## Kentavious Caldwell-Pope -2.20137247  0.823482753
## Jalen Williams           -1.77088011  1.662712781
## Dennis Smith Jr.         -2.38981682 -4.058571848
## Ochai Agbaji             -1.61424758  3.117399223
## Bradley Beal             -0.82754947 -0.659943331
## Dereck Lively II          5.13940729  4.908354365
## Greg Monroe               0.61040396 -0.650297864
## Brandon Ingram           -1.61026723 -1.916191506
## Mikal Bridges            -1.14420389  3.554794904
## Derrick Williams          1.06807992 -1.073410109
## Ben McLemore              0.30783252 -0.245872619
## Anthony Edwards          -1.36140263 -2.673735875
## Josh Jackson              1.57310640 -3.071593787
## Kira Lewis Jr.           -3.22512852 -0.145447070
## Zach LaVine              -1.55093364  2.642329927
## Aaron Nesmith            -3.24624148  1.628776546
## Justise Winslow          -0.32450165 -0.046720801
## Kevin Knox               -0.57857116 -0.787030146
## Jarace Walker             1.37801187  1.216966913
## Noah Vonleh               1.16775866 -0.188969620
## Willie Cauley-Stein       2.67767592  4.026223965
## Michael Kidd-Gilchrist    1.54339794 -0.537228608
## Kemba Walker             -3.16087458 -0.642457328
## Myles Turner              1.91803535  1.800563503
## Jett Howard              -2.88030411  1.519978771
## Evan Turner              -1.44633779 -0.509352088
## Domantas Sabonis          1.73763383  1.176518418
## Anthony Davis             6.81946695 -1.298925467
## Bennedict Mathurin       -1.03736333  1.047637079
## D'Angelo Russell         -2.92106171 -2.705140788
## Joel Embiid               4.56071760  0.275049579
## Steven Adams              3.71212322  2.800032293
## Chris Duarte             -1.86046277  1.292904206
## Jayson Tatum             -0.72039348 -2.664410902
## Kelly Olynyk              0.78055387  3.858127881
## Paolo Banchero            0.78117820 -2.804053329
## Paul George              -2.04066776  0.155321706
## Jalen Duren               6.04834658 -0.567839906
## DeMarcus Cousins          4.47722765 -2.532951574
## De'Andre Hunter          -0.73977851  2.045387990
## Jimmer Fredette          -4.10068583  0.151455721
## Andrew Wiggins            0.40394987 -2.619085299
## Ziaire Williams          -1.73956057  0.236909097
## Kendall Marshall         -4.50594244  1.991570055
## Markieff Morris           1.34708852  3.999247075
## Deandre Ayton             6.42063201 -3.989709140
## Michael Carter-Williams  -2.86581601  0.870790993
## John Wall                -1.68160821 -3.975501776
## Kyrie Irving             -3.69754992 -2.860691262
## Cason Wallace            -2.27311780  0.159975101
## Stanley Johnson          -1.12015696 -1.170691584
## Davion Mitchell          -3.46328985  1.590099336
## Otto Porter Jr.           0.11050188  1.773914232
## Joshua Primo             -1.51844668  3.740079647
## Anthony Black            -1.15287196 -1.737863300
## Markelle Fultz           -1.98389283 -6.289069268
## John Henson               3.00626308  3.180396627
## Donovan Mitchell         -3.10251794  1.824734500
## Jabari Parker             2.47210802 -3.877981214
## Trey Burke               -3.56814702 -0.621441435
## Wes Johnson               1.59432891 -1.355658714
## Jahlil Okafor             5.53352532 -2.780697880
## Andre Drummond            6.01362530  0.579810217
## Karl-Anthony Towns        2.12840084  1.109330782
## Marvin Bagley III         5.74422617 -4.591555606
## Tristan Thompson         -4.06381690  2.262810456
## Ed Davis                  3.34988242  3.186735688
## Marcus Smart             -2.54328344 -2.015237976
## Doug McDermott           -0.97886742  1.321678910
## Jalen Suggs              -1.51193273 -1.416722416
## Julius Randle             2.24340507 -3.042945784
## Miles Bridges            -0.43665877  0.849012232
## Zion Williamson           5.17455606 -4.695617011
## Johnny Davis             -1.29838193  0.145475806
## Tyrese Haliburton        -2.20631834  2.697991218
## Damian Lillard           -4.26753179 -0.405236246
## Alex Len                  3.65666018  2.221479147
## P.J. Washington           0.38430532  1.057924680
## Obi Toppin                3.10013537  0.850875977
## Jaren Jackson Jr.         1.54989689  1.654063788
## Trae Young               -6.57771252 -7.880998042
## James Bouknight          -1.50440966  0.805096400
## Patrick Patterson         2.59675241  2.071578251
## Anthony Bennett           3.16583380 -1.008475683
## Derrick Favors            5.07612783 -0.537442192
## Ja Morant                -2.44350644 -3.154068779
## Cole Aldrich              2.80512022  3.923034303
## Bam Adebayo               6.03442637 -1.151787352
## Gordon Hayward           -0.63712837  0.809277017
## Al-Farouq Aminu           1.06979323 -0.683184259
## Devin Booker             -1.24174959  3.484009154
## Brandon Miller           -1.94836905 -2.026234708
## Keegan Murray             1.65497692  0.671771098
## Tyler Herro              -1.75186786  0.416344735
## Jabari Smith Jr.         -1.57911124 -1.495506781
## Jamal Murray             -2.32101496 -1.773731573
## Shai Gilgeous-Alexander  -1.99930055 -2.359093850
## Trey Lyles                2.31079038  2.311364621
## Romeo Langford           -0.69108021 -2.226529738
## Alec Burks               -0.72391129 -2.047902304
## Marquese Chriss           2.85951816 -0.454169608
## Brandon Knight           -3.48956878 -2.033951063
## Mo Bamba                  5.19088002 -0.667639378
## Coby White               -3.30501478 -0.962609638
## Xavier Henry             -1.19039770  0.836284356
## Isaac Okoro               0.45583166 -0.276546204
## Nerlens Noel              5.52149230 -0.801447386
## Rui Hachimura             0.93351176  2.636481966
## Cameron Payne            -4.43284878 -1.397687085
## Terrence Ross            -1.12336118  2.567881207
## Frank Kaminsky           -0.17667352  4.085571358
## Elfrid Payton            -1.97980456 -0.903624480
## Harrison Barnes          -1.07392023  0.415393391
## Patrick Williams         -0.07429544  1.354172663
## Ben Simmons               3.63357550 -6.421547748
## Austin Rivers            -2.94309828 -1.408832101
## Jeremy Sochan             0.94012964  1.962196272
## Jakob Poeltl              3.09307807  1.010869663
## T.J. Warren               1.14870807 -0.419904979
## Marcus Morris             0.78619850  2.586158338
## Cody Zeller               2.04375199 -0.560514715
## Taylor Hendricks          1.22550512  0.175647086
## Jerome Robinson          -3.01145976  0.204910540
## Ekpe Udoh                 2.95349083 -2.108737125
## Collin Sexton            -2.49870233 -3.953433538
## Jaxson Hayes              6.14378844  1.935755954
## Kris Dunn                -2.64831964  0.054392871
## De'Aaron Fox             -0.82524429 -3.344308266
## Wendell Carter Jr.        3.43851631 -0.759548294
## Jonathan Isaac            1.55579355  0.429327692
## Gradey Dick              -1.38567770  1.245442843
## Jaden Ivey               -1.84082641  0.056522486
## Jordan Hawkins           -3.10079731  2.711257786
## Jaylen Brown             -0.83240004 -2.294511909
## Cameron Johnson          -1.40190265  2.112733416
## Malik Monk               -2.24035661 -1.712986893
## RJ Barrett               -0.01227205 -5.110094647
## Thomas Robinson           0.46560196  2.442928178
## Cade Cunningham          -2.85426045 -4.449564443
## Evan Mobley               4.34455690 -2.691116412
## Onyeka Okongwu            5.85709792 -2.599723240
## Chet Holmgren             4.24371197 -0.005887613
## Lauri Markkanen          -0.13400874  0.115337390
## Luke Kennard             -2.73577444  0.919701787
## Lonzo Ball               -0.20427619 -0.082601232
## Scottie Barnes           -0.34079523 -0.018038969
## Nik Stauskas             -3.10769654  1.847553292
## Jarrett Culver           -1.47235138  0.024087543
## Meyers Leonard            2.79244626  3.646226533
## Shabazz Muhammad          0.41381159 -2.159011494
## Moses Moody              -1.43065318 -1.445225669
## Denzel Valentine         -3.02166326  3.193070266
## Zach Collins              2.29766335  2.206084028
## Cam Reddish              -4.08549065  0.100918560
## Jalen Smith               2.16080003  1.786709370
## CJ McCollum              -3.87520407 -0.619831203
## Aaron Gordon              2.77173027 -0.539925608
## Jeremy Lamb              -0.44879947  2.590888791
## Victor Oladipo           -0.59336620  2.543388232
## Taurean Prince           -1.41666106  3.713851811
## Franz Wagner             -1.19544539  2.167513895
## Devin Vassell            -0.68178092  4.075928100
## Klay Thompson            -3.79667554  0.207371725
fviz_nbclust(df_cluster, kmeans, method = 'wss')

fviz_nbclust(df_cluster, kmeans, method = 'silhouette')

fviz_nbclust(df_cluster, kmeans, method = 'gap_stat')

k <- 10
df_cbb.kmeans = kmeans(df_cluster, centers = k, nstart = 50)
fviz_cluster(df_cbb.kmeans, df_cluster, labelsize = 5, pointsize = 1, show.clust.cent = FALSE, repel = TRUE)

df_cbb.sample$group <- df_cbb.kmeans$cluster
df_cbb.sample <- df_cbb.sample |> relocate(group, .before = dunk_made)
head(df_cbb.sample)
##                          group dunk_made dunk_attempts dunk_pct  rim_made
## Dion Waiters                 7 0.3661972     0.4084507    0.897 1.2676056
## Buddy Hield                  8 0.1363636     0.1666667    0.818 0.9015152
## Kentavious Caldwell-Pope     8 0.2343750     0.2500000    0.938 0.9843750
## Jalen Williams               7 0.2976190     0.3214286    0.926 1.4761905
## Dennis Smith Jr.             1 0.6875000     0.6875000    1.000 3.2187500
## Ochai Agbaji                 7 0.3032787     0.3360656    0.902 0.9754098
##                          rim_attempts rim_pct rim_asted other2pt_made
## Dion Waiters                 2.028169   0.625     0.378     0.5492958
## Buddy Hield                  1.348485   0.669     0.277     0.2651515
## Kentavious Caldwell-Pope     1.468750   0.670     0.349     0.6093750
## Jalen Williams               2.214286   0.667     0.355     0.6309524
## Dennis Smith Jr.             5.031250   0.640     0.155     1.0312500
## Ochai Agbaji                 1.344262   0.726     0.529     0.2950820
##                          other2pt_attempts other2pt_pct other2pt_asted fg2_pct
## Dion Waiters                     1.4084507        0.390          0.282   0.513
## Buddy Hield                      0.7651515        0.347          0.057   0.508
## Kentavious Caldwell-Pope         1.7500000        0.348          0.385   0.504
## Jalen Williams                   1.6071429        0.393          0.170   0.525
## Dennis Smith Jr.                 3.3437500        0.308          0.030   0.509
## Ochai Agbaji                     1.0327869        0.286          0.389   0.526
##                          fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g games
## Dion Waiters                   1.0        2.8     0.780     0.3571429    71
## Buddy Hield                    2.6        6.8     0.687     0.3823529   132
## Kentavious Caldwell-Pope       2.3        6.9     0.583     0.3333333    64
## Jalen Williams                 1.1        3.0     0.714     0.3666667    84
## Dennis Smith Jr.               1.7        4.8     0.418     0.3541667    32
## Ochai Agbaji                   2.0        5.5     0.854     0.3636364   122
##                          ft_per_g fta_per_g ast_per_g_college orb_per_g
## Dion Waiters                  1.8       2.3               2.0       0.4
## Buddy Hield                   2.8       3.4               1.9       1.6
## Kentavious Caldwell-Pope      2.9       3.9               1.5       1.3
## Jalen Williams                2.6       3.3               2.9       0.5
## Dennis Smith Jr.              4.5       6.3               6.2       0.8
## Ochai Agbaji                  1.8       2.5               1.6       1.0
##                          drb_per_g stl_per_g blk_per_g tov_per_g
## Dion Waiters                   1.5       1.5       0.2       1.1
## Buddy Hield                    3.4       1.3       0.3       2.2
## Kentavious Caldwell-Pope       4.8       1.9       0.4       1.6
## Jalen Williams                 3.2       1.2       0.5       1.6
## Dennis Smith Jr.               3.8       1.9       0.4       3.4
## Ochai Agbaji                   3.4       1.0       0.5       1.7
##                          pts_per_g_college
## Dion Waiters                           9.7
## Buddy Hield                           17.4
## Kentavious Caldwell-Pope              15.8
## Jalen Williams                        12.6
## Dennis Smith Jr.                      18.1
## Ochai Agbaji                          13.5
df_cbb.sample |> group_by(group) |>
  summarize(across(everything(), mean))
## # A tibble: 10 × 27
##    group dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##    <int>     <dbl>         <dbl>    <dbl>    <dbl>        <dbl>   <dbl>
##  1     1     0.414         0.457    0.869    2.38          3.82   0.626
##  2     2     0.28          0.3      0.466    3.06          5.44   0.570
##  3     3     2.03          2.15     0.945    5.13          6.72   0.764
##  4     4     0.464         0.510    0.917    1.64          2.46   0.671
##  5     5     1.13          1.23     0.918    3.19          4.80   0.669
##  6     6     0.927         1.01     0.925    2.59          3.74   0.695
##  7     7     0.271         0.306    0.890    1.03          1.55   0.674
##  8     8     0.127         0.145    0.830    0.973         1.58   0.616
##  9     9     0.644         0.695    0.922    1.56          2.12   0.745
## 10    10     2.06          2.16     0.959    3.51          4.49   0.787
## # ℹ 20 more variables: rim_asted <dbl>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <dbl>, other2pt_asted <dbl>,
## #   fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_asted <dbl>,
## #   fg3_pct_per_g <dbl>, games <dbl>, ft_per_g <dbl>, fta_per_g <dbl>,
## #   ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, stl_per_g <dbl>,
## #   blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>
df_cbb.sample |> group_by(group) |>
  summarize(across(everything(), median))
## # A tibble: 10 × 27
##    group dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##    <int>     <dbl>         <dbl>    <dbl>    <dbl>        <dbl>   <dbl>
##  1     1     0.412         0.438    0.909    2.41          4      0.634
##  2     2     0.28          0.3      0.466    3.06          5.44   0.570
##  3     3     1.99          2.09     0.953    5.05          6.65   0.774
##  4     4     0.465         0.493    0.917    1.59          2.48   0.666
##  5     5     1.04          1.12     0.925    3.28          4.87   0.673
##  6     6     0.85          0.9      0.933    2.59          3.66   0.699
##  7     7     0.283         0.294    0.901    1.03          1.45   0.684
##  8     8     0.128         0.137    0.934    0.963         1.54   0.613
##  9     9     0.569         0.667    0.939    1.68          2.22   0.748
## 10    10     2.24          2.31     0.958    3.63          4.56   0.776
## # ℹ 20 more variables: rim_asted <dbl>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <dbl>, other2pt_asted <dbl>,
## #   fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_asted <dbl>,
## #   fg3_pct_per_g <dbl>, games <dbl>, ft_per_g <dbl>, fta_per_g <dbl>,
## #   ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, stl_per_g <dbl>,
## #   blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>